# install.packages("sf")
# install.packages("leaflet")
library(sf)
library(tidyverse)
library(leaflet)

Analyzing Aggravated Burglaries in Davidson County

Part 1: Data Preparation

I have been provided these three datasets for this project:

  1. burglaries_2023.csv: Contains data on the aggravated burglary incidents in Davidson County. This was obtained from https://data.nashville.gov/Police/Metro-Nashville-Police-Department-Incidents/2u6v-ujjs.
# read the csv of burglaries data into the notebook

burglary_incidents <- read_csv('../data/burglaries_2023.csv')

# view the dataset

burglary_incidents
NA

Investigate the ethnicity column some:


# investigate the ethnicity column some

unique(burglary_incidents[["victim_ethnicity"]])
[1] "Non-Hispanic" NA             "Hispanic"     "Unknown"     
  1. census.csv: Census tract level data on population and median income. This was obtained from the US Census American Community Survey.

# read the csv of census data into the notebook

census <- read_csv('../data/census.csv')

# view the dataset

census
NA
  1. DC: A shapefile containing Davidson County census tracts

# read in the shape file data for Davidson county census tracts
# read in the DC file data

dav_cty_census_tracts <- read_sf('../data/DC/DC.shp')
dav_cty_census_tracts
Simple feature collection with 174 features and 12 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -87.0547 ymin: 35.96778 xmax: -86.51559 ymax: 36.4055
Geodetic CRS:  NAD83
burglary_incidents
dav_cty_census_tracts |> 
  ggplot() +
  geom_sf()

dav_cty_census_tracts |> 
  ggplot() +
  geom_sf(aes(fill = ALAND))

Perform a spatial join to determine the census tract in which each burglary occurred. Hint: You may want to make use of the st_as_sf function in order to convert the burglaries data into an sf object.


# performed spatial join 

burglary_incidents

burglary_incidents_mapped <- st_as_sf(
  burglary_incidents |> 
    drop_na(latitude) |> 
    drop_na(longitude),
    coords = c('longitude', 'latitude'),
    crs = st_crs(dav_cty_census_tracts)
)

burglary_incidents_mapped
Simple feature collection with 1146 features and 27 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -92.51 ymin: 34.15 xmax: -86.557 ymax: 36.34
Geodetic CRS:  NAD83

Rename column in Davidson county census tract data so that the merge goes more smoothly.


dav_cty_census_tracts <- rename(dav_cty_census_tracts, tract_name = NAME)

Merge census csv data with dav_cty_census_tracts DC shape file data.


census_tracts <- merge(dav_cty_census_tracts, census, by.x = "TRACTCE", by.y = "tract", all = TRUE)

census_tracts
Simple feature collection with 174 features and 17 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -87.0547 ymin: 35.96778 xmax: -86.51559 ymax: 36.4055
Geodetic CRS:  NAD83
First 10 features:
   TRACTCE STATEFP COUNTYFP       GEOID tract_name            NAMELSAD MTFCC FUNCSTAT    ALAND AWATER
1   010103      47      037 47037010103     101.03 Census Tract 101.03 G5020        S 48034082  61097
2   010104      47      037 47037010104     101.04 Census Tract 101.04 G5020        S 65057849 251504
3   010105      47      037 47037010105     101.05 Census Tract 101.05 G5020        S 28328799   1093
4   010106      47      037 47037010106     101.06 Census Tract 101.06 G5020        S 21616474   6845
5   010201      47      037 47037010201     102.01 Census Tract 102.01 G5020        S 23718545      0
6   010202      47      037 47037010202     102.02 Census Tract 102.02 G5020        S 68394934  77571
7   010301      47      037 47037010301     103.01 Census Tract 103.01 G5020        S  8527942  11775
8   010302      47      037 47037010302     103.02 Census Tract 103.02 G5020        S  4179336   6813
9   010303      47      037 47037010303     103.03 Census Tract 103.03 G5020        S  4508896 142888
10  010401      47      037 47037010401     104.01 Census Tract 104.01 G5020        S  9543414 320298
      INTPTLAT     INTPTLON                                            NAME state county population
1  +36.3444054 -086.8608396 Census Tract 101.03, Davidson County, Tennessee    47    037       2411
2  +36.2940028 -086.8777483 Census Tract 101.04, Davidson County, Tennessee    47    037       3002
3  +36.2504208 -086.8521501 Census Tract 101.05, Davidson County, Tennessee    47    037       4839
4  +36.2610013 -086.8023491 Census Tract 101.06, Davidson County, Tennessee    47    037       2948
5  +36.2882537 -086.7728157 Census Tract 102.01, Davidson County, Tennessee    47    037       4283
6  +36.3619781 -086.7746355 Census Tract 102.02, Davidson County, Tennessee    47    037       3919
7  +36.3161492 -086.7261435 Census Tract 103.01, Davidson County, Tennessee    47    037       3914
8  +36.3139482 -086.7125964 Census Tract 103.02, Davidson County, Tennessee    47    037       1589
9  +36.3132279 -086.7006728 Census Tract 103.03, Davidson County, Tennessee    47    037       5114
10 +36.2943965 -086.6864670 Census Tract 104.01, Davidson County, Tennessee    47    037       4734
   median_income                       geometry
1          60000 MULTIPOLYGON (((-86.91752 3...
2          84831 MULTIPOLYGON (((-86.9744 36...
3          61115 MULTIPOLYGON (((-86.89144 3...
4          66940 MULTIPOLYGON (((-86.83089 3...
5          69185 MULTIPOLYGON (((-86.81736 3...
6          81695 MULTIPOLYGON (((-86.82483 3...
7          52806 MULTIPOLYGON (((-86.74132 3...
8          50341 MULTIPOLYGON (((-86.72469 3...
9          46604 MULTIPOLYGON (((-86.71971 3...
10         47025 MULTIPOLYGON (((-86.71149 3...
# burglary_incidents_mapped

census_tracts |> 
  ggplot() +
  geom_sf()


burglary_incidents_mapped_filtered <- st_filter(burglary_incidents_mapped, census_tracts)


census_tracts |> 
   ggplot() +
   geom_sf() +
   geom_sf(data = burglary_incidents_mapped_filtered, size = 0.1)

NA
NA
NA
NA

After performing the spatial join, merge in the census data. Note: Make sure that the final dataset contains all census tracts, even those with zero burglaries.


burglary_census_combo <- st_join(burglary_incidents_mapped, census_tracts, join = st_within, left=FALSE)

burglary_census_combo
Simple feature collection with 1142 features and 44 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -87.02 ymin: 35.99 xmax: -86.557 ymax: 36.34
Geodetic CRS:  NAD83

Part 2: Exploratory Analysis

Perform some exploratory analysis on your prepared dataset.

Classes of the two datasets:


class(census_tracts)
[1] "sf"         "data.frame"
class(burglary_census_combo)
[1] "sf"         "tbl_df"     "tbl"        "data.frame"

Curious as to the highest number of victims in one burglary.


burglary_census_combo |>
  filter(victim_number == max(victim_number, na.rm = TRUE))
Simple feature collection with 1 feature and 44 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -86.69 ymin: 36.15 xmax: -86.69 ymax: 36.15
Geodetic CRS:  NAD83

Limit dataset to non repeated incident numbers and locate the highest number of victims per indcident number.


real_num_burglaries <- burglary_census_combo |>
  group_by(incident_number) |>
  filter(victim_number == max(victim_number, na.rm = TRUE)) |>
  arrange(desc(victim_number))

real_num_burglaries
Simple feature collection with 894 features and 44 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -87.02 ymin: 35.99 xmax: -86.557 ymax: 36.34
Geodetic CRS:  NAD83

Calculate the accurate number of burglaries in each tract.


burglaries_per_tract_real <- real_num_burglaries |> 
  st_drop_geometry() |> 
  group_by(TRACTCE) |> 
  count(name = "num_burglaries") |> 
  arrange(desc(num_burglaries))

burglaries_per_tract_real
NA

Comparing the non filtered number to the result before filtering:


burglaries_per_tract <- burglary_census_combo |> 
  st_drop_geometry() |> 
  group_by(TRACTCE) |> 
  count(name = "num_burglaries") |> 
  arrange(desc(num_burglaries))

burglaries_per_tract
NA

Aggregate the data by census tract. Warning: each incident can appear multiple times if there are multiple victims, so be sure that you aren’t double-counting any incidents.


burglaries_per_tract_real
NA
LS0tDQp0aXRsZTogImNnZF9leHBsb3JhdGlvbl9nZW9zcGF0aWFsX1IiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCg0KIyBpbnN0YWxsLnBhY2thZ2VzKCJzZiIpDQojIGluc3RhbGwucGFja2FnZXMoImxlYWZsZXQiKQ0KbGlicmFyeShzZikNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShsZWFmbGV0KQ0KDQpgYGANCg0KIyBBbmFseXppbmcgQWdncmF2YXRlZCBCdXJnbGFyaWVzIGluIERhdmlkc29uIENvdW50eQ0KIyMjIFBhcnQgMTogRGF0YSBQcmVwYXJhdGlvbg0KDQpJIGhhdmUgYmVlbiBwcm92aWRlZCB0aGVzZSB0aHJlZSBkYXRhc2V0cyBmb3IgdGhpcyBwcm9qZWN0Og0KDQoxLiBidXJnbGFyaWVzXzIwMjMuY3N2OiBDb250YWlucyBkYXRhIG9uIHRoZSBhZ2dyYXZhdGVkIGJ1cmdsYXJ5IGluY2lkZW50cyBpbiBEYXZpZHNvbiBDb3VudHkuIFRoaXMgd2FzIG9idGFpbmVkIGZyb20gaHR0cHM6Ly9kYXRhLm5hc2h2aWxsZS5nb3YvUG9saWNlL01ldHJvLU5hc2h2aWxsZS1Qb2xpY2UtRGVwYXJ0bWVudC1JbmNpZGVudHMvMnU2di11ampzLg0KYGBge3J9DQojIHJlYWQgdGhlIGNzdiBvZiBidXJnbGFyaWVzIGRhdGEgaW50byB0aGUgbm90ZWJvb2sNCg0KYnVyZ2xhcnlfaW5jaWRlbnRzIDwtIHJlYWRfY3N2KCcuLi9kYXRhL2J1cmdsYXJpZXNfMjAyMy5jc3YnKQ0KDQojIHZpZXcgdGhlIGRhdGFzZXQNCg0KYnVyZ2xhcnlfaW5jaWRlbnRzDQoNCmBgYA0KDQpJbnZlc3RpZ2F0ZSB0aGUgZXRobmljaXR5IGNvbHVtbiBzb21lOiANCg0KYGBge3J9DQoNCiMgaW52ZXN0aWdhdGUgdGhlIGV0aG5pY2l0eSBjb2x1bW4gc29tZQ0KDQp1bmlxdWUoYnVyZ2xhcnlfaW5jaWRlbnRzW1sidmljdGltX2V0aG5pY2l0eSJdXSkNCg0KYGBgDQpgYGB7cn0NCg0KDQoNCmBgYA0KDQoNCjIuIGNlbnN1cy5jc3Y6IENlbnN1cyB0cmFjdCBsZXZlbCBkYXRhIG9uIHBvcHVsYXRpb24gYW5kIG1lZGlhbiBpbmNvbWUuIFRoaXMgd2FzIG9idGFpbmVkIGZyb20gdGhlIFVTIENlbnN1cyBBbWVyaWNhbiBDb21tdW5pdHkgU3VydmV5Lg0KDQpgYGB7cn0NCg0KIyByZWFkIHRoZSBjc3Ygb2YgY2Vuc3VzIGRhdGEgaW50byB0aGUgbm90ZWJvb2sNCg0KY2Vuc3VzIDwtIHJlYWRfY3N2KCcuLi9kYXRhL2NlbnN1cy5jc3YnKQ0KDQojIHZpZXcgdGhlIGRhdGFzZXQNCg0KY2Vuc3VzDQoNCmBgYA0KDQoNCjMuIERDOiBBIHNoYXBlZmlsZSBjb250YWluaW5nIERhdmlkc29uIENvdW50eSBjZW5zdXMgdHJhY3RzDQoNCmBgYHtyfQ0KDQojIHJlYWQgaW4gdGhlIHNoYXBlIGZpbGUgZGF0YSBmb3IgRGF2aWRzb24gY291bnR5IGNlbnN1cyB0cmFjdHMNCiMgcmVhZCBpbiB0aGUgREMgZmlsZSBkYXRhDQoNCmRhdl9jdHlfY2Vuc3VzX3RyYWN0cyA8LSByZWFkX3NmKCcuLi9kYXRhL0RDL0RDLnNocCcpDQpkYXZfY3R5X2NlbnN1c190cmFjdHMNCg0KYnVyZ2xhcnlfaW5jaWRlbnRzDQpgYGANCmBgYHtyfQ0KZGF2X2N0eV9jZW5zdXNfdHJhY3RzIHw+IA0KICBnZ3Bsb3QoKSArDQogIGdlb21fc2YoKQ0KYGBgDQpgYGB7cn0NCmRhdl9jdHlfY2Vuc3VzX3RyYWN0cyB8PiANCiAgZ2dwbG90KCkgKw0KICBnZW9tX3NmKGFlcyhmaWxsID0gQUxBTkQpKQ0KYGBgDQoNCg0KDQpQZXJmb3JtIGEgc3BhdGlhbCBqb2luIHRvIGRldGVybWluZSB0aGUgY2Vuc3VzIHRyYWN0IGluIHdoaWNoIGVhY2ggYnVyZ2xhcnkgb2NjdXJyZWQuIEhpbnQ6IFlvdSBtYXkgd2FudCB0byBtYWtlIHVzZSBvZiB0aGUgc3RfYXNfc2YgZnVuY3Rpb24gaW4gb3JkZXIgdG8gY29udmVydCB0aGUgYnVyZ2xhcmllcyBkYXRhIGludG8gYW4gc2Ygb2JqZWN0Lg0KDQpgYGB7cn0NCg0KIyBwZXJmb3JtZWQgc3BhdGlhbCBqb2luIA0KDQpidXJnbGFyeV9pbmNpZGVudHMNCg0KYnVyZ2xhcnlfaW5jaWRlbnRzX21hcHBlZCA8LSBzdF9hc19zZigNCiAgYnVyZ2xhcnlfaW5jaWRlbnRzIHw+IA0KICAgIGRyb3BfbmEobGF0aXR1ZGUpIHw+IA0KICAgIGRyb3BfbmEobG9uZ2l0dWRlKSwNCiAgICBjb29yZHMgPSBjKCdsb25naXR1ZGUnLCAnbGF0aXR1ZGUnKSwNCiAgICBjcnMgPSBzdF9jcnMoZGF2X2N0eV9jZW5zdXNfdHJhY3RzKQ0KKQ0KDQpidXJnbGFyeV9pbmNpZGVudHNfbWFwcGVkDQoNCg0KYGBgDQoNClJlbmFtZSBjb2x1bW4gaW4gRGF2aWRzb24gY291bnR5IGNlbnN1cyB0cmFjdCBkYXRhIHNvIHRoYXQgdGhlIG1lcmdlIGdvZXMgbW9yZSBzbW9vdGhseS4NCg0KYGBge3J9DQoNCmRhdl9jdHlfY2Vuc3VzX3RyYWN0cyA8LSByZW5hbWUoZGF2X2N0eV9jZW5zdXNfdHJhY3RzLCB0cmFjdF9uYW1lID0gTkFNRSkNCg0KYGBgDQoNCg0KTWVyZ2UgY2Vuc3VzIGNzdiBkYXRhIHdpdGggZGF2X2N0eV9jZW5zdXNfdHJhY3RzIERDIHNoYXBlIGZpbGUgZGF0YS4NCg0KDQpgYGB7cn0NCg0KY2Vuc3VzX3RyYWN0cyA8LSBtZXJnZShkYXZfY3R5X2NlbnN1c190cmFjdHMsIGNlbnN1cywgYnkueCA9ICJUUkFDVENFIiwgYnkueSA9ICJ0cmFjdCIsIGFsbCA9IFRSVUUpDQoNCmNlbnN1c190cmFjdHMNCiMgYnVyZ2xhcnlfaW5jaWRlbnRzX21hcHBlZA0KDQpgYGANCg0KYGBge3J9DQoNCmNlbnN1c190cmFjdHMgfD4gDQogIGdncGxvdCgpICsNCiAgZ2VvbV9zZigpDQoNCmBgYA0KDQpgYGB7cn0NCg0KYnVyZ2xhcnlfaW5jaWRlbnRzX21hcHBlZF9maWx0ZXJlZCA8LSBzdF9maWx0ZXIoYnVyZ2xhcnlfaW5jaWRlbnRzX21hcHBlZCwgY2Vuc3VzX3RyYWN0cykNCg0KYGBgDQoNCg0KYGBge3J9DQoNCg0KY2Vuc3VzX3RyYWN0cyB8PiANCiAgIGdncGxvdCgpICsNCiAgIGdlb21fc2YoKSArDQogICBnZW9tX3NmKGRhdGEgPSBidXJnbGFyeV9pbmNpZGVudHNfbWFwcGVkX2ZpbHRlcmVkLCBzaXplID0gMC4xKQ0KDQoNCg0KDQpgYGANCg0KDQpBZnRlciBwZXJmb3JtaW5nIHRoZSBzcGF0aWFsIGpvaW4sIG1lcmdlIGluIHRoZSBjZW5zdXMgZGF0YS4gTm90ZTogTWFrZSBzdXJlIHRoYXQgdGhlIGZpbmFsIGRhdGFzZXQgY29udGFpbnMgYWxsIGNlbnN1cyB0cmFjdHMsIGV2ZW4gdGhvc2Ugd2l0aCB6ZXJvIGJ1cmdsYXJpZXMuDQoNCmBgYHtyfQ0KDQpidXJnbGFyeV9jZW5zdXNfY29tYm8gPC0gc3Rfam9pbihidXJnbGFyeV9pbmNpZGVudHNfbWFwcGVkLCBjZW5zdXNfdHJhY3RzLCBqb2luID0gc3Rfd2l0aGluLCBsZWZ0PUZBTFNFKQ0KDQoNCmBgYA0KDQoNCmBgYHtyfQ0KDQpidXJnbGFyeV9jZW5zdXNfY29tYm8NCg0KYGBgDQoNCg0KIyMjIFBhcnQgMjogRXhwbG9yYXRvcnkgQW5hbHlzaXMNCg0KUGVyZm9ybSBzb21lIGV4cGxvcmF0b3J5IGFuYWx5c2lzIG9uIHlvdXIgcHJlcGFyZWQgZGF0YXNldC4NCg0KQ2xhc3NlcyBvZiB0aGUgdHdvIGRhdGFzZXRzOg0KDQpgYGB7cn0NCg0KY2xhc3MoY2Vuc3VzX3RyYWN0cykNCmNsYXNzKGJ1cmdsYXJ5X2NlbnN1c19jb21ibykNCg0KYGBgDQoNCkN1cmlvdXMgYXMgdG8gdGhlIGhpZ2hlc3QgbnVtYmVyIG9mIHZpY3RpbXMgaW4gb25lIGJ1cmdsYXJ5Lg0KDQpgYGB7cn0NCg0KYnVyZ2xhcnlfY2Vuc3VzX2NvbWJvIHw+DQogIGZpbHRlcih2aWN0aW1fbnVtYmVyID09IG1heCh2aWN0aW1fbnVtYmVyLCBuYS5ybSA9IFRSVUUpKQ0KDQpgYGANCkxpbWl0IGRhdGFzZXQgdG8gbm9uIHJlcGVhdGVkIGluY2lkZW50IG51bWJlcnMgYW5kIGxvY2F0ZSB0aGUgaGlnaGVzdCBudW1iZXIgb2YgdmljdGltcyBwZXIgaW5kY2lkZW50IG51bWJlci4gDQoNCmBgYHtyfQ0KDQpyZWFsX251bV9idXJnbGFyaWVzIDwtIGJ1cmdsYXJ5X2NlbnN1c19jb21ibyB8Pg0KICBncm91cF9ieShpbmNpZGVudF9udW1iZXIpIHw+DQogIGZpbHRlcih2aWN0aW1fbnVtYmVyID09IG1heCh2aWN0aW1fbnVtYmVyLCBuYS5ybSA9IFRSVUUpKSB8Pg0KICBhcnJhbmdlKGRlc2ModmljdGltX251bWJlcikpDQoNCnJlYWxfbnVtX2J1cmdsYXJpZXMNCg0KYGBgDQpDYWxjdWxhdGUgdGhlIGFjY3VyYXRlIG51bWJlciBvZiBidXJnbGFyaWVzIGluIGVhY2ggdHJhY3QuDQoNCg0KYGBge3J9DQoNCmJ1cmdsYXJpZXNfcGVyX3RyYWN0X3JlYWwgPC0gcmVhbF9udW1fYnVyZ2xhcmllcyB8PiANCiAgc3RfZHJvcF9nZW9tZXRyeSgpIHw+IA0KICBncm91cF9ieShUUkFDVENFKSB8PiANCiAgY291bnQobmFtZSA9ICJudW1fYnVyZ2xhcmllcyIpIHw+IA0KICBhcnJhbmdlKGRlc2MobnVtX2J1cmdsYXJpZXMpKQ0KDQpidXJnbGFyaWVzX3Blcl90cmFjdF9yZWFsDQoNCmBgYA0KQ29tcGFyaW5nIHRoZSBub24gZmlsdGVyZWQgbnVtYmVyIHRvIHRoZSByZXN1bHQgYmVmb3JlIGZpbHRlcmluZzoNCg0KDQpgYGB7cn0NCg0KYnVyZ2xhcmllc19wZXJfdHJhY3QgPC0gYnVyZ2xhcnlfY2Vuc3VzX2NvbWJvIHw+IA0KICBzdF9kcm9wX2dlb21ldHJ5KCkgfD4gDQogIGdyb3VwX2J5KFRSQUNUQ0UpIHw+IA0KICBjb3VudChuYW1lID0gIm51bV9idXJnbGFyaWVzIikgfD4gDQogIGFycmFuZ2UoZGVzYyhudW1fYnVyZ2xhcmllcykpDQoNCmJ1cmdsYXJpZXNfcGVyX3RyYWN0DQoNCmBgYA0KDQpBZ2dyZWdhdGUgdGhlIGRhdGEgYnkgY2Vuc3VzIHRyYWN0LiBXYXJuaW5nOiBlYWNoIGluY2lkZW50IGNhbiBhcHBlYXIgbXVsdGlwbGUgdGltZXMgaWYgdGhlcmUgYXJlIG11bHRpcGxlIHZpY3RpbXMsIHNvIGJlIHN1cmUgdGhhdCB5b3UgYXJlbid0IGRvdWJsZS1jb3VudGluZyBhbnkgaW5jaWRlbnRzLg0KDQoNCmBgYHtyfQ0KDQpidXJnbGFyaWVzX3Blcl90cmFjdF9yZWFsDQoNCmBgYA0KDQoNCg0KYGBge3J9DQoNCg0KDQpgYGANCg0KYGBge3J9DQoNCmBgYA0KDQpgYGB7cn0NCg0KYGBgDQoNCmBgYHtyfQ0KDQpgYGANCg0KYGBge3J9DQoNCmBgYA0KDQpgYGB7cn0NCg0KYGBgDQoNCmBgYHtyfQ0KDQpgYGANCg0KYGBge3J9DQoNCmBgYA0KDQpgYGB7cn0NCg0KYGBgDQoNCmBgYHtyfQ0KDQpgYGANCg0KDQo=